* Generate analysis samples
* 2/2/2025

clear all
set more off

program main
	// create main dataset for regressions with actual SSI recipients, for all 3 designs
	foreach design in state sc triple {
		prepare_data, design(`design') sample(actual_ssi)
	}
	
	// for robustness checks: target samples of "likely SSI recipients"
	foreach sample in max_fed120 ed_home {
		prepare_data, design(state) sample(`sample')
	}
end

program prepare_data
	syntax, design(str) sample(str)
	
	if "`design'" == "sc" {
		use "$intermediate/sc_raw_0016", clear
		keep if hhtype == 1 | hhtype == 4 | hhtype == 6 // household type explicitly live-alone or couple
	}
	else if "`design'" == "state" {
		use "$intermediate/states_raw_0016", clear
		* keep only live-alone HH types, or all individuals in states w/o a restriction to singles
		keep if (hhtype == 4 | hhtype == 6) | ///
			(statefip == 35 | statefip == 48 | statefip == 21 | statefip == 46) 
		drop if statefip == 35 & year > 2013 // New Mexico: only had the CAP for 5 years and then eliminated it
	}
	else if "`design'" == "triple" {
		use "$intermediate/states_raw_0016", clear
		drop if hhtype == 2 | hhtype == 3	// drop ambiguous household types
	}
	
	* state-specific eligibility criteria 
	drop if age < 65 & inlist(statefip, 37, 51, 34, 4) // these states require age 65+
	keep if incwage == 0 | inlist(statefip, 36, 37, 22, 21, 48, 4, 46) // no earned income
	
	drop if hhtype == 9 | hhtype == 0 // undetermined household types
	gen single = (hhtype == 4 | hhtype == 6) // lives alone
	
	* create education categories; check other covariates
	gen edcat = 1 if educd <= 61
	replace edcat = 2 if educd >= 62 & educd <= 64
	replace edcat = 3 if educd >= 65 & educd <= 100
	replace edcat = 4 if educd > 100
	lab def edlabel 1 "HS dropout" 2 "HS grad" 3 "some college" 4 "college grad"
	lab val edcat edlabel
	
	assert !mi(diffphys)
	
	foreach policy in bbce call_any faceini facerec oapp no_fp { // other state SNAP policies
		gen has_`policy' = (year > `policy'_startyear)
	}

	* define the target sample of actual or likely SSI recipients
	if "`sample'" == "actual_ssi" {
		assert !mi(incsupp)
		keep if incsupp > 0 & incsupp != 99999  // code for missing is 99999
	}
	else if "`sample'" == "max_fed120" {
		gen married = (marst == 1) // married for SSI benefit determination (conservative)
		keep if (married == 0 & hhincome <= 1.2*indiv) | ///
		(married == 1 & hhincome <= 1.2*couple)
	}
	else if "`sample'" == "ed_home" {
		keep if inlist(edcat, 1, 2)	 // education is HS dropout or HS graduate
		keep if ownershp == 2	// does not own home
	}
	
	* generate outcome and treatment variables
	gen snap = (foodstmp == 2)
	ren cap_startyear startyear
	replace startyear = 1995 if statefip == 45	// South Carolina
	
	* indicator for Standard (vs Modified) CAP
	gen standard = (statefip == 45 | statefip == 28 | ///
		statefip == 53 | statefip == 36 | statefip == 25 | statefip == 12 | statefip == 42)

	gen relyr = year - startyear
	replace relyr = -6 if relyr < -6  // bin large relative years
	replace relyr = 6 if relyr > 6

	if "`design'" == "sc" { // restrict to states with single eligibility requirement
		keep if statefip == 45 | statefip == 28 | statefip == 53 | statefip == 36 | ///
			statefip == 25 | statefip == 12 | statefip == 37 | statefip == 42 | ///
			statefip == 51 | statefip == 22 | statefip == 26 | statefip == 34 | statefip == 24
		
		gen treat = (hhtype == 4 | hhtype == 6) // treatment is single and live alone
		replace relyr = 0 if treat == 0
		gen post = (year > startyear & !mi(startyear))
		gen inter = treat*post
	}
	else if "`design'" == "state" {
		replace relyr = 0 if startyear == .
		gen inter = (year > startyear & !mi(startyear))
	}
	else if "`design'" == "triple" {
		drop if inlist(statefip, 48, 21, 4, 46, 35) // drop states with a CAP but no singles requirement
		
		gen treated = (startyear != .)	// treated state
		replace relyr = 0 if mi(startyear)
		gen post = (year > startyear & !mi(startyear))	// post treatment year
		gen inter = (treated == 1 & post == 1)	// interaction of post and treat
		gen triple = (inter == 1 & single == 1)	// triple interaction
	}
	
	egen relyr_pos = group(relyr)
	labmask relyr_pos, values(relyr)
	save "$for_analysis/`design'_for_regressions_`sample'", replace
end

* Execute
main

